---
title: "Kern and Hainmueller (2009, PA): Extrapolation (Non-linear Program)"
author: "Arthur Zeyang Yu"
date: "June 5, 2023"
output: html_document
---

```{r setup, include = TRUE}
knitr::opts_chunk$set(echo = TRUE)

# clear memory
rm(list = ls())

# use packages
library("readstata13") # package for reading stata 13 file
library("foreign")
library("AER") # package for instrumental variable
library("boot") # package for bootstrap
library("tidyverse")
library("xlsx") # package for write results in xlsx

# set directories
data_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/data"
results_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/results"

# read data
kh_2009pa <- read.dta13(paste0(data_path, "/kh_2009pa_cleaned.dta"))

# rename outcome variable lenin to y
kh_2009pa <- kh_2009pa %>% mutate(y = lenin)
kh_2009pa <- kh_2009pa %>% mutate(d_0 = 1 - treatment)
```

# compute identifiable quantities
```{r}
# first-stage in KH (2009, PA)
first_stage <- lm(treatment ~ iv, data = kh_2009pa)
first_stage <- summary(first_stage)$coefficients[2, 1]

# reduced form in KH (2009, PA)
reduced_form <- lm(y ~ iv, data = kh_2009pa)
reduced_form <- summary(reduced_form)$coefficients[2, 1]

# late in KH (2009, PA)
late <- ivreg(y ~ treatment | iv, data = kh_2009pa)
late <- summary(late)$coefficients[2, 1]

# compute E[Y | D = d, Z = z]
for(i in 0:1){
  
  for(j in 0:1){
    
    assign(paste0("e_y_d", i, "z", j), 
           mean(kh_2009pa[kh_2009pa$treatment == i
                          & kh_2009pa$iv == j,
                          "y"]))
    
  }
  
}

# compute P[D = d, Z = z]
for(i in 0:1){
  
  for(j in 0:1){
    
    assign(paste0("p_d", i, "z", j),
           sum(with(kh_2009pa, treatment == i & iv == j))/nrow(kh_2009pa))
    
  }
  
}

# compute E[1{D = d, Z = z}Y] (IV-like estimands)
for(i in 0:1){
  
  for(j in 0:1){
    
    nam1 <- paste0("e_y_d", i, "z", j)
    nam2 <- paste0("p_d", i, "z", j)
    assign(paste0("e_d", i, "z", j, "y"),
           get(nam1) * get(nam2))
    
  }
  
}

# compute: E[Y(d) | P(z) = z, D = d]
for(i in 0:1){
  
  for(j in 0:1){
    
    assign(paste0("e_yd_p", i, "d", j), 
           mean(kh_2009pa[kh_2009pa$treatment == j 
                          & kh_2009pa$iv == i, 
                          "y"]))
    
  }
  
}

# compute: proportion of AT, C, and NT
c_prop <- first_stage
c_prop

nt_prop <- mean(kh_2009pa[kh_2009pa$iv == 1, "d_0"])
nt_prop

at_prop <- mean(kh_2009pa[kh_2009pa$iv == 0, "treatment"])
at_prop

# compute: propensity score
for(i in 0:1){
  
  assign(paste0("p", i),
         mean(kh_2009pa[kh_2009pa$iv == i, "treatment"]))
  
}

# compute: E[Y] and Var(Y)
e_y <- mean(kh_2009pa$y)
sd_y <- sd(kh_2009pa$y)

# compute: E[D] and Var(D)
e_d <- mean(kh_2009pa$treatment)
sd_d <- sd(kh_2009pa$treatment)
var_d <- var(kh_2009pa$treatment)

# compute: P[D = 0]
p_d0 <- 1 - e_d

# compute: E[Z] and Var(Z)
e_z <- mean(kh_2009pa$iv)
sd_z <- sd(kh_2009pa$iv)

# compute: cov(D, Z)
cov_dz <- cov(kh_2009pa$treatment, kh_2009pa$iv)

# compute: E[Y(1) | D(1) = D(0) = 1] = E[Y | D = 1, Z = 0]
e_y1_at <- mean(kh_2009pa[kh_2009pa$treatment == 1 &
                            kh_2009pa$iv == 0, "y"])
e_y1_at

# compute: E[Y(0) | D(1) = D(0) = 0] = E[Y | D = 0, Z = 1]
e_y0_nt <- mean(kh_2009pa[kh_2009pa$treatment == 0 &
                            kh_2009pa$iv == 1, "y"])
e_y0_nt

# compute: compliers' E[Y(1) | C] and E[Y(0) | C]
## compute: E[YD | Z = z] & E[Y(1 - D) | Z = z]
kh_2009pa$yd <- kh_2009pa$y * kh_2009pa$treatment
kh_2009pa$yd_rev <- kh_2009pa$y * kh_2009pa$d_0
e_yd_z1 <- mean(kh_2009pa[kh_2009pa$iv == 1, "yd"])
e_yd_z0 <- mean(kh_2009pa[kh_2009pa$iv == 0, "yd"])
e_yd_rev_z1 <- mean(kh_2009pa[kh_2009pa$iv == 1, "yd_rev"])
e_yd_rev_z0 <- mean(kh_2009pa[kh_2009pa$iv == 0, "yd_rev"])
## compute: E[1 - D | Z = z]
e_d_rev_z1 <- mean(kh_2009pa[kh_2009pa$iv == 1, "d_0"])
e_d_rev_z0 <- mean(kh_2009pa[kh_2009pa$iv == 0, "d_0"])
## compute: complier
e_y1_c <- (e_yd_z1 - e_yd_z0)/(p1 - p0)
e_y1_c
e_y0_c <- (e_yd_rev_z1 - e_yd_rev_z0)/(e_d_rev_z1 - e_d_rev_z0)
e_y0_c

# compute: delta(at & nt) in STR & SMTR assumptions
delta_at <- (p1 - p0) * e_y0_c + (1 - p1) * e_y0_nt
delta_nt <- (p1 - p0) * e_y1_c + p0 * e_y1_at
```

# compute point identification results (replicate Section 4.3 Extrapolation 1: Point Estimates by Linearizing MTR Pairs)
```{r}
# compute: \mu_{0} and \alpha_{0} by solving system of equations
A0 <- matrix(c(1, 1, (p0/2), (p1/2)), 2, 2)
b0 <- c(e_yd_p0d0, e_yd_p1d0)
mu_0 <- solve(A0, b0)[1]
mu_0
alpha_0 <- solve(A0, b0)[2]
alpha_0

# compute: \mu_{1} and \alpha_{1} by solving system of equations
A1 <- matrix(c(1, 1, ((p0 - 1)/2), ((p1 - 1)/2)), 2, 2)
b1 <- c(e_yd_p0d1, e_yd_p1d1)
mu_1 <- solve(A1, b1)[1]
mu_1
alpha_1 <- solve(A1, b1)[2]
alpha_1

# define: approximated MTR pairs based on linearity assumption
m0_u_l <- function(x) {
  y <- mu_0 + alpha_0 * x - 0.5 * alpha_0
  return(y)
}

m1_u_l <- function(x) {
  y <- mu_1 + alpha_1 * x - 0.5 * alpha_1
  return(y)
}

# compute: ATE of AT
ate_at_lin_mte <- e_y1_at - (1/p0) * integrate(m0_u_l, 0, p0)$value
ate_at_lin_mte

# compute: ATE of NT
ate_nt_lin_mte <- (1/(1 - p1)) * integrate(m1_u_l, p1, 1)$value - e_y0_nt
ate_nt_lin_mte
```

# replicate Table 3
```{r}
# prepare a matrix for storing results
linear_extrap_equ <- matrix(NA, 3, 4)

# store proportion of compliance types
linear_extrap_equ[1,1] <- at_prop
linear_extrap_equ[1,2] <- c_prop
linear_extrap_equ[1,3] <- nt_prop

# store support results for T = 0
linear_extrap_equ[2,1] <- e_y1_at - ate_at_lin_mte
linear_extrap_equ[2,2] <- e_y0_c
linear_extrap_equ[2,3] <- e_y0_nt
linear_extrap_equ[2,4] <- linear_extrap_equ[1,1:3] %*% linear_extrap_equ[2,1:3]

# store support results for T = 1
linear_extrap_equ[3,1] <- e_y1_at
linear_extrap_equ[3,2] <- e_y1_c
linear_extrap_equ[3,3] <- ate_nt_lin_mte + e_y0_nt
linear_extrap_equ[3,4] <- linear_extrap_equ[1,1:3] %*% linear_extrap_equ[3,1:3]

# round up the results
linear_extrap_equ <- round(linear_extrap_equ, digits = 3)

# save results in Excel
write.xlsx(linear_extrap_equ,
           file = paste0(results_path, "/table_3.xlsx"),
           sheetName = "Sheet1")
```


# compute Manski and MTR bounds (replicate Table J1: Manski and MTR Bounds in Kern and Hainmueller (2009))
```{r}
# ATE of always-takers: Manski bound
bd_at_manski_l <- e_y1_at - 1
bd_at_manski_u <- e_y1_at - 0
bd_at_manski_l
bd_at_manski_u

# ATE of never-takers: Manski bound
bd_nt_manski_l <- 0 - e_y0_nt
bd_nt_manski_u <- 1 - e_y0_nt
bd_nt_manski_l
bd_nt_manski_u
```

# compute STR and SMTR bounds (replicate Table J2: Sensitivity Analysis of b in STR and SMTR Assumptions)
```{r}
# compute: define sequence of b and results matrix
b_seq <- c(0.005, 0.01, 0.015, 0.03, 0.05, 0.07, 0.1, 0.12)
# results matrix for STR
sens_b_str <- matrix(NA, nrow = 8, ncol = 5)
# results matrix for SMTR
sens_b_smtr <- matrix(NA, nrow = 8, ncol = 5)

# compute: bounds of STR (loop over different b)
for(i in seq(1, 8)){
  
  # write b in b and write b in mat
  b <- b_seq[i]
  sens_b_str[i, 1] <- b_seq[i]
  
  # bounds for AT
  sens_b_str[i, 2] <- e_y1_at - (1/p0) * (e_y + b * e_d - delta_at)
  sens_b_str[i, 2] <- round(sens_b_str[i, 2], digits = 4)
  sens_b_str[i, 3] <- e_y1_at - (1/p0) * (e_y - b * e_d - delta_at)
  sens_b_str[i, 3] <- round(sens_b_str[i, 3], digits = 4)
  
  # bounds for NT
  sens_b_str[i, 4] <- (1/(1 - p1)) * (e_y - b * p_d0 - delta_nt) - e_y0_nt
  sens_b_str[i, 4] <- round(sens_b_str[i, 4], digits = 4)
  sens_b_str[i, 5] <- (1/(1 - p1)) * (e_y + b * p_d0 - delta_nt) - e_y0_nt
  sens_b_str[i, 5] <- round(sens_b_str[i, 5], digits = 4)
  
}

# compute: bounds of SMTR (loop over different b)
for(i in seq(1, 8)){
  
  # write b in b and write b in mat
  b <- b_seq[i]
  sens_b_smtr[i, 1] <- b_seq[i]
  
  # bounds for AT
  sens_b_smtr[i, 2] <- e_y1_at - (1/p0) * (e_y - delta_at)
  sens_b_smtr[i, 2] <- round(sens_b_smtr[i, 2], digits = 4)
  sens_b_smtr[i, 3] <- e_y1_at - (1/p0) * (e_y - (b * e_d) - delta_at)
  sens_b_smtr[i, 3] <- round(sens_b_smtr[i, 3], digits = 4)
  
  # bounds for NT
  sens_b_smtr[i, 4] <- (1/(1 - p1)) * (e_y - delta_nt) - e_y0_nt
  sens_b_smtr[i, 4] <- round(sens_b_smtr[i, 4], digits = 4)
  sens_b_smtr[i, 5] <- (1/(1 - p1)) * (e_y + b * p_d0 - delta_nt) - e_y0_nt
  sens_b_smtr[i, 5] <- round(sens_b_smtr[i, 5], digits = 4)
  
}

# display results in r markdown
sens_b_str
sens_b_smtr

# save results in Excel
write.xlsx(sens_b_str,
           file = paste0(results_path, "/table_j2_panela.xlsx"),
           sheetName = "Sheet1")

write.xlsx(sens_b_smtr,
           file = paste0(results_path, "/table_j2_panelb.xlsx"),
           sheetName = "Sheet1")
```
